InΒ [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline
InΒ [2]:
df = pd.read_csv('Unemployment in India.csv')
InΒ [3]:
print(df)
             Region         Date  Frequency   Estimated Unemployment Rate (%)  \
0    Andhra Pradesh   31-05-2019    Monthly                              3.65   
1    Andhra Pradesh   30-06-2019    Monthly                              3.05   
2    Andhra Pradesh   31-07-2019    Monthly                              3.75   
3    Andhra Pradesh   31-08-2019    Monthly                              3.32   
4    Andhra Pradesh   30-09-2019    Monthly                              5.17   
..              ...          ...        ...                               ...   
763             NaN          NaN        NaN                               NaN   
764             NaN          NaN        NaN                               NaN   
765             NaN          NaN        NaN                               NaN   
766             NaN          NaN        NaN                               NaN   
767             NaN          NaN        NaN                               NaN   

      Estimated Employed   Estimated Labour Participation Rate (%)   Area  
0             11999139.0                                     43.24  Rural  
1             11755881.0                                     42.05  Rural  
2             12086707.0                                     43.50  Rural  
3             12285693.0                                     43.97  Rural  
4             12256762.0                                     44.68  Rural  
..                   ...                                       ...    ...  
763                  NaN                                       NaN    NaN  
764                  NaN                                       NaN    NaN  
765                  NaN                                       NaN    NaN  
766                  NaN                                       NaN    NaN  
767                  NaN                                       NaN    NaN  

[768 rows x 7 columns]
InΒ [4]:
#first five rows and columns in dataset
df.head()
Out[4]:
Region Date Frequency Estimated Unemployment Rate (%) Estimated Employed Estimated Labour Participation Rate (%) Area
0 Andhra Pradesh 31-05-2019 Monthly 3.65 11999139.0 43.24 Rural
1 Andhra Pradesh 30-06-2019 Monthly 3.05 11755881.0 42.05 Rural
2 Andhra Pradesh 31-07-2019 Monthly 3.75 12086707.0 43.50 Rural
3 Andhra Pradesh 31-08-2019 Monthly 3.32 12285693.0 43.97 Rural
4 Andhra Pradesh 30-09-2019 Monthly 5.17 12256762.0 44.68 Rural
InΒ [5]:
#last five rows and columns in datset
df.tail()
Out[5]:
Region Date Frequency Estimated Unemployment Rate (%) Estimated Employed Estimated Labour Participation Rate (%) Area
763 NaN NaN NaN NaN NaN NaN NaN
764 NaN NaN NaN NaN NaN NaN NaN
765 NaN NaN NaN NaN NaN NaN NaN
766 NaN NaN NaN NaN NaN NaN NaN
767 NaN NaN NaN NaN NaN NaN NaN
InΒ [6]:
#total number of rows and columns in dataset
df.shape
Out[6]:
(768, 7)
InΒ [7]:
#checking for missing values
df.isnull().sum()
Out[7]:
Region                                      28
 Date                                       28
 Frequency                                  28
 Estimated Unemployment Rate (%)            28
 Estimated Employed                         28
 Estimated Labour Participation Rate (%)    28
Area                                        28
dtype: int64
InΒ [8]:
#dropping missing values
df.dropna(inplace=True)
InΒ [9]:
df.isnull().sum()
Out[9]:
Region                                      0
 Date                                       0
 Frequency                                  0
 Estimated Unemployment Rate (%)            0
 Estimated Employed                         0
 Estimated Labour Participation Rate (%)    0
Area                                        0
dtype: int64
InΒ [10]:
#column's data type
df.dtypes
Out[10]:
Region                                       object
 Date                                        object
 Frequency                                   object
 Estimated Unemployment Rate (%)            float64
 Estimated Employed                         float64
 Estimated Labour Participation Rate (%)    float64
Area                                         object
dtype: object
InΒ [11]:
#names of columns in dataset
df.columns
Out[11]:
Index(['Region', ' Date', ' Frequency', ' Estimated Unemployment Rate (%)',
       ' Estimated Employed', ' Estimated Labour Participation Rate (%)',
       'Area'],
      dtype='object')
InΒ [12]:
#removing empty spaces before and after the column names
df.columns=df.columns.str.strip()
df
Out[12]:
Region Date Frequency Estimated Unemployment Rate (%) Estimated Employed Estimated Labour Participation Rate (%) Area
0 Andhra Pradesh 31-05-2019 Monthly 3.65 11999139.0 43.24 Rural
1 Andhra Pradesh 30-06-2019 Monthly 3.05 11755881.0 42.05 Rural
2 Andhra Pradesh 31-07-2019 Monthly 3.75 12086707.0 43.50 Rural
3 Andhra Pradesh 31-08-2019 Monthly 3.32 12285693.0 43.97 Rural
4 Andhra Pradesh 30-09-2019 Monthly 5.17 12256762.0 44.68 Rural
... ... ... ... ... ... ... ...
749 West Bengal 29-02-2020 Monthly 7.55 10871168.0 44.09 Urban
750 West Bengal 31-03-2020 Monthly 6.67 10806105.0 43.34 Urban
751 West Bengal 30-04-2020 Monthly 15.63 9299466.0 41.20 Urban
752 West Bengal 31-05-2020 Monthly 15.22 9240903.0 40.67 Urban
753 West Bengal 30-06-2020 Monthly 9.86 9088931.0 37.57 Urban

740 rows Γ— 7 columns

InΒ [13]:
#checking for duplicate values
print(df.duplicated().sum())
0
InΒ [14]:
#detailed information about dataset
df.info()
<class 'pandas.core.frame.DataFrame'>
Index: 740 entries, 0 to 753
Data columns (total 7 columns):
 #   Column                                   Non-Null Count  Dtype  
---  ------                                   --------------  -----  
 0   Region                                   740 non-null    object 
 1   Date                                     740 non-null    object 
 2   Frequency                                740 non-null    object 
 3   Estimated Unemployment Rate (%)          740 non-null    float64
 4   Estimated Employed                       740 non-null    float64
 5   Estimated Labour Participation Rate (%)  740 non-null    float64
 6   Area                                     740 non-null    object 
dtypes: float64(3), object(4)
memory usage: 46.2+ KB
InΒ [15]:
#Descriptive statistics
df.describe()
Out[15]:
Estimated Unemployment Rate (%) Estimated Employed Estimated Labour Participation Rate (%)
count 740.000000 7.400000e+02 740.000000
mean 11.787946 7.204460e+06 42.630122
std 10.721298 8.087988e+06 8.111094
min 0.000000 4.942000e+04 13.330000
25% 4.657500 1.190404e+06 38.062500
50% 8.350000 4.744178e+06 41.160000
75% 15.887500 1.127549e+07 45.505000
max 76.740000 4.577751e+07 72.570000
InΒ [16]:
#Adding column to dataset
df['Date'] = pd.to_datetime(df['Date'])
df['MM YYYY'] = df['Date'].dt.strftime('%m %Y')
InΒ [17]:
df
Out[17]:
Region Date Frequency Estimated Unemployment Rate (%) Estimated Employed Estimated Labour Participation Rate (%) Area MM YYYY
0 Andhra Pradesh 2019-05-31 Monthly 3.65 11999139.0 43.24 Rural 05 2019
1 Andhra Pradesh 2019-06-30 Monthly 3.05 11755881.0 42.05 Rural 06 2019
2 Andhra Pradesh 2019-07-31 Monthly 3.75 12086707.0 43.50 Rural 07 2019
3 Andhra Pradesh 2019-08-31 Monthly 3.32 12285693.0 43.97 Rural 08 2019
4 Andhra Pradesh 2019-09-30 Monthly 5.17 12256762.0 44.68 Rural 09 2019
... ... ... ... ... ... ... ... ...
749 West Bengal 2020-02-29 Monthly 7.55 10871168.0 44.09 Urban 02 2020
750 West Bengal 2020-03-31 Monthly 6.67 10806105.0 43.34 Urban 03 2020
751 West Bengal 2020-04-30 Monthly 15.63 9299466.0 41.20 Urban 04 2020
752 West Bengal 2020-05-31 Monthly 15.22 9240903.0 40.67 Urban 05 2020
753 West Bengal 2020-06-30 Monthly 9.86 9088931.0 37.57 Urban 06 2020

740 rows Γ— 8 columns

InΒ [18]:
#counting unique values of frequency
df.value_counts('Frequency')
Out[18]:
Frequency
Monthly     381
 Monthly    359
Name: count, dtype: int64
InΒ [19]:
#replacing ' Monthly' to 'Monthly'
df['Frequency']=df['Frequency'].replace(' Monthly','Monthly')
InΒ [20]:
df.value_counts('Frequency')
Out[20]:
Frequency
Monthly    740
Name: count, dtype: int64
InΒ [21]:
#counting unique values in region
df.value_counts('Region')
Out[21]:
Region
Andhra Pradesh      28
Karnataka           28
Uttar Pradesh       28
Tripura             28
Telangana           28
Tamil Nadu          28
Rajasthan           28
Punjab              28
Odisha              28
Maharashtra         28
Kerala              28
Madhya Pradesh      28
Jharkhand           28
Himachal Pradesh    28
Haryana             28
Gujarat             28
Delhi               28
Chhattisgarh        28
Bihar               28
West Bengal         28
Meghalaya           27
Uttarakhand         27
Assam               26
Puducherry          26
Goa                 24
Jammu & Kashmir     21
Sikkim              17
Chandigarh          12
Name: count, dtype: int64
InΒ [22]:
df['Region'].nunique()
Out[22]:
28
InΒ [23]:
#counting unique values in area
df.value_counts('Area')
Out[23]:
Area
Urban    381
Rural    359
Name: count, dtype: int64
InΒ [24]:
area_count = df['Area'].value_counts()
fig = px.pie(area_count, 
             values=area_count.values, 
             names=area_count.index, 
             title='Area Distribution', 
             hole=0.3)
fig.show()
InΒ [25]:
region_count = df['Region'].value_counts()
fig = px.pie(region_count, 
             values=region_count.values, 
             names=region_count.index, 
             title='Region Distribution', 
             hole=0.3)
fig.show()
InΒ [26]:
rural = df[df.Area == 'Rural']
urban = df[df.Area == 'Urban']
InΒ [27]:
rural.pivot_table(index = 'MM YYYY', values = 'Estimated Unemployment Rate (%)', aggfunc = np.mean)
Out[27]:
Estimated Unemployment Rate (%)
MM YYYY
01 2020 7.842692
02 2020 8.752308
03 2020 9.683333
04 2020 21.746000
05 2019 7.068077
05 2020 21.210800
06 2019 8.201154
06 2020 11.825200
07 2019 7.741923
08 2019 8.503077
09 2019 7.036800
10 2019 9.051111
11 2019 8.432222
12 2019 8.233600
InΒ [28]:
plt.figure(figsize = (14,7))
sns.barplot(x = 'MM YYYY', y = 'Estimated Unemployment Rate (%)', data = rural, errorbar=('ci',0), palette='pastel')
plt.xlabel('MM YYYY')
plt.ylabel('Unemployment Rate (%)')
plt.title("Rural - Unemployment Rate (%)");
plt.xticks(rotation=45)
plt.show()
No description has been provided for this image
InΒ [29]:
plt.figure(figsize = (14,7))
sns.barplot(x = 'MM YYYY', y = 'Estimated Unemployment Rate (%)', data = urban, errorbar=('ci', 0))
plt.xlabel('Month-Year')
plt.ylabel('Unemployment Rate (%)')
plt.title("Urban - Unemployment Rate (%)");
No description has been provided for this image
InΒ [30]:
fig = px.sunburst(df, path=['MM YYYY', 'Area'], values='Estimated Unemployment Rate (%)', title='Sunburst Plot of Unemployment Rate Comparison')
fig.show()
InΒ [31]:
from matplotlib.ticker import FuncFormatter
rural = pd.DataFrame(df)
def millions_formatter(x, pos):
    return f'{x*1e-6:.1f}M'
plt.figure(figsize=(10, 5))
sns.barplot(x='MM YYYY', y='Estimated Employed', data=rural, errorbar=('ci', 0), palette='Set2')
plt.xlabel('Month-Year')
plt.ylabel('Estimated Employed')
plt.xticks(rotation=45)
plt.title("Rural - Estimated Employed")
plt.gca().yaxis.set_major_formatter(FuncFormatter(millions_formatter))
plt.show()
No description has been provided for this image
InΒ [32]:
from matplotlib.ticker import FuncFormatter
rural = pd.DataFrame(df)
def millions_formatter(x, pos):
    return f'{x*1e-6:.1f}M'
plt.figure(figsize=(10, 5))
sns.barplot(x='MM YYYY', y='Estimated Employed', data=urban, errorbar=('ci', 0), palette='Set2')
plt.xlabel('Month-Year')
plt.ylabel('Estimated Employed')
plt.xticks(rotation=45)
plt.title("Urban - Estimated Employed")
plt.gca().yaxis.set_major_formatter(FuncFormatter(millions_formatter))
plt.show()
No description has been provided for this image
InΒ [33]:
fig = px.sunburst(df, path=['MM YYYY', 'Area'], values='Estimated Employed', title='Sunburst Plot of Employment Rate Comparison')
fig.show()
InΒ [34]:
plt.figure(figsize = (12,6))
sns.barplot(x = 'MM YYYY', y = 'Estimated Labour Participation Rate (%)', data = rural, errorbar=('ci', 0), palette='Set2')
plt.xlabel('Month-Year')
plt.ylabel('Labour Participation Rate')
plt.title("Rural - Labour Participation Rate");
No description has been provided for this image
InΒ [35]:
plt.figure(figsize = (12,6))
sns.barplot(x = 'MM YYYY', y = 'Estimated Labour Participation Rate (%)', data = urban,errorbar=('ci', 0),palette='Set2')
plt.xlabel('Month-Year')
plt.ylabel('Labour Participation Rate (%)')
plt.title("Urban - Labour Participation Rate");
No description has been provided for this image
InΒ [36]:
px.scatter(df,x='MM YYYY',y='Estimated Labour Participation Rate (%)',color='Area')
InΒ [37]:
#Estimated unemployment rate over time
plt.figure(figsize=(12, 6))
sns.lineplot(x='MM YYYY', y='Estimated Unemployment Rate (%)', data=df, marker='o')
plt.title('Estimated Unemployment Rate (%) Over Time')
plt.xlabel('MM YYYY')
plt.ylabel('Estimated Unemployment Rate (%)')
plt.xticks(rotation=45)
plt.grid(True)
plt.show()
No description has been provided for this image
InΒ [38]:
figure = px.bar(df, x = 'Date', y = 'Estimated Labour Participation Rate (%)', color = 'Date', title = 'Estimated Labour Participation Rate (%)')
figure.show()
InΒ [39]:
#Average, highest and lowest unemployment rate in India
#calculating average unemployment rate by region
average_unemployment_rate = df.groupby('Region')['Estimated Unemployment Rate (%)'].mean()

#State with highest unemployment rate
state_with_highest_unemployment_rate = average_unemployment_rate.idxmax()
highest_unemployment_rate = average_unemployment_rate.max()

#State with lowest unemployment rate
state_with_lowest_unemployment_rate = average_unemployment_rate.idxmin()
lowest_unemployment_rate = average_unemployment_rate.min()

print("State with highest unemployment rate:", state_with_highest_unemployment_rate)
print("Highest unemployment rate:", highest_unemployment_rate)
print("State with lowest unemployment rate:", state_with_lowest_unemployment_rate)
print("Lowest unemployment rate:", lowest_unemployment_rate)
State with highest unemployment rate: Tripura
Highest unemployment rate: 28.350357142857142
State with lowest unemployment rate: Meghalaya
Lowest unemployment rate: 4.7988888888888885
InΒ [40]:
sns.set_palette("Set1")
plt.figure(figsize=(12, 6))
average_unemployment_rate.sort_values(ascending=False).plot(kind='bar')
plt.title("Average Unemployment Rate by State")
plt.xlabel("Region")
plt.ylabel("Average Unemployment Rate (%)")
plt.xticks(rotation=90)
plt.show()
No description has been provided for this image
InΒ [41]:
figure = px.bar(df, x = 'Date', y = 'Estimated Employed', color = 'Date', title = 'Estimated Employed People')
figure.show()
InΒ [42]:
#Average, highest and lowest employment rate in India
#calculating average employment rate by region
average_employment_rate = df.groupby('Region')['Estimated Employed'].mean()

#State with highest unemployment rate
state_with_highest_employment_rate = average_employment_rate.idxmax()
highest_employment_rate = average_employment_rate.max()

#State with lowest unemployment rate
state_with_lowest_employment_rate = average_employment_rate.idxmin()
lowest_employment_rate = average_employment_rate.min()

print("State with highest employment rate:", state_with_highest_employment_rate)
print("Highest employment rate:", highest_employment_rate)
print("State with lowest employment rate:", state_with_lowest_employment_rate)
print("Lowest employment rate:", lowest_employment_rate)
State with highest employment rate: Uttar Pradesh
Highest employment rate: 28094832.17857143
State with lowest employment rate: Sikkim
Lowest employment rate: 106880.70588235294
InΒ [43]:
from matplotlib.ticker import FuncFormatter
rural = pd.DataFrame(df)
def millions_formatter(x, pos):
    return f'{x*1e-6:.1f}M'
sns.set_palette('Set2')
plt.figure(figsize=(10, 5))
average_employment_rate.sort_values(ascending=False).plot(kind='bar')
plt.xlabel('Regionr')
plt.ylabel('Average Employment Rate (%)')
plt.xticks(rotation=90)
plt.title("Average Employment Rate by State")
plt.gca().yaxis.set_major_formatter(FuncFormatter(millions_formatter))
plt.show()
No description has been provided for this image
InΒ [44]:
#Average, highest and lowest employment rate in India
#calculating average employment rate by region
average_labour_participation_rate = df.groupby('Region')['Estimated Labour Participation Rate (%)'].mean()

#State with highest unemployment rate
state_with_highest_labour_participation_rate = average_labour_participation_rate.idxmax()
highest_labour_participation_rate = average_labour_participation_rate.max()

#State with lowest unemployment rate
state_with_lowest_labour_participation_rate = average_labour_participation_rate.idxmin()
lowest_labour_participation_rate = average_labour_participation_rate.min()

print("State with highest labour participation rate:", state_with_highest_labour_participation_rate)
print("Highest labour participation rate:", highest_labour_participation_rate)
print("State with lowest labour participation rate:", state_with_lowest_labour_participation_rate)
print("Lowest labour participation rate:", lowest_labour_participation_rate)
State with highest labour participation rate: Tripura
Highest labour participation rate: 61.82392857142857
State with lowest labour participation rate: Uttarakhand
Lowest labour participation rate: 33.775555555555556
InΒ [45]:
sns.set_palette("viridis")
plt.figure(figsize=(12, 6))
average_labour_participation_rate.sort_values(ascending=False).plot(kind='bar')
plt.title("Average labour participation Rate by State")
plt.xlabel("Region")
plt.ylabel("Average labour participation Rate (%)")
plt.xticks(rotation=90)
plt.show()
No description has been provided for this image
InΒ [46]:
fig = px.sunburst(df, path=['Area', 'Region'], values='Estimated Unemployment Rate (%)', title='Sunburst Plot of Estimated Unemployment Rate by Region and Area')
fig.show()
InΒ [47]:
correlation = df[['Estimated Unemployment Rate (%)', 'Estimated Employed', 'Estimated Labour Participation Rate (%)']].corr()
plt.figure(figsize=(10,8))
sns.heatmap(correlation, cmap='coolwarm')
for i in range(correlation.shape[0]):
    for j in range(correlation.shape[1]):
        plt.text(j+0.5, i+0.5, "{:.2f}".format(correlation.iloc[i, j]), ha='center', va='center', color='white')
plt.title('Correlation Heatmap')
plt.show()
No description has been provided for this image

Conclusion:ΒΆ

  1. The unemployment rate in rural areas exceeded that in urban areas from May 2019 to May 2020.
  2. However, by June 2020, the unemployment rates in both urban and rural areas had nearly equalized.
  3. Notably, in April and May 2020, the unemployment rate surged significantly, coinciding with the onset of the COVID-19 pandemic's economic impact.
  4. The rural areas have a higher employed population compared to urban areas.
  5. However, there was a sudden decrease in the employed population during April and May 2020.
  6. The labor participation rate is higher in rural areas than in urban areas.
  7. However, there was a sudden decrease in the labor participation rate in April 2020.
  8. The state with highest unemployment rate is Tripura, while the state with lowest unemployment rate is Meghalaya.
  9. The state with highest employment rate is Uttar Pradesh, while the state with lowest employment rate is Sikkim.
  10. The state with highest labour participation rate is Tripura, while the state with lowest labour participation rate is uttarakhand.
  11. Most of the people are employed at January 31,2020
  12. From the given data set, the labour participation rate was high in October and November 2019.